library(ggplot2)
library(devtools)
library(phyloseq)
library(picante)
library(tidyr)
library(forcats)
library(dplyr)
library(tibble)
library(cowplot)
library(picante) # Will also include ape and vegan
library(car) # For residual analysis
library(sandwich) # for vcovHC function in post-hoc test
library(MASS) # For studres in plot_residuals function
library(caret) # For cross validation
library(pander) # For pretty tables
source("code/Muskegon_functions.R")
source("code/set_colors.R")# Loads a phyloseq object named otu_merged_musk_pruned)
load("data/surface_PAFL_otu_pruned_raw.RData")
# The name of the phyloseq object is:
surface_PAFL_otu_pruned_raw ## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7806 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 7806 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 7806 tips and 7804 internal nodes ]
# Remove doubletons!
surface_PAFL_otu_pruned_rm2 <- prune_taxa(taxa_sums(surface_PAFL_otu_pruned_raw) > 2, surface_PAFL_otu_pruned_raw)
surface_PAFL_otu_pruned_rm2## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2979 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 2979 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2979 tips and 2977 internal nodes ]
# Remove tree for computational efficiency
surface_PAFL_otu_pruned_notree_rm2 <- phyloseq(tax_table(surface_PAFL_otu_pruned_rm2), otu_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))
# Gather the metadata in a dataframe to play with
metadata <- data.frame(sample_data(surface_PAFL_otu_pruned_notree_rm2)) %>%
mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))scaled_surface_PAFLA_pruned_rm2 <- scale_reads(surface_PAFL_otu_pruned_notree_rm2)
set.seed(777)
# Calculate the SOREN distance
soren_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = TRUE) # Make it presence/absence
p1 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = soren_whole_dist,
color = "fraction",
shape = "lakesite",
title = "Sørensen") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = c(21, 22, 23, 24)) +
theme(legend.position = "bottom")
# Calculate the BRAY-CURTIS distance
bray_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = FALSE) # Make it Abundance weighted
p2 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = bray_whole_dist ,
color = "fraction",
shape = "lakesite",
title = "Bray-Curtis") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = c(21, 22, 23, 24)) +
theme(legend.position = "bottom")
plot_grid(p1, p2, labels = c("A", "B"), align = "h", ncol = 2)######################################################### Fraction ABUNDANCe
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox##
## Wilcoxon rank sum test
##
## data: log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(as.numeric(fraction_bac_abund)))## # A tibble: 2 × 2
## fraction `mean(as.numeric(fraction_bac_abund))`
## <fctr> <dbl>
## 1 Particle 41168.88
## 2 Free 734522.25
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree
poster_a <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
ylab("Log10(Bacterial Cells/mL)") +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox##
## Wilcoxon rank sum test
##
## data: frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(frac_bacprod))## # A tibble: 2 × 2
## fraction `mean(frac_bacprod)`
## <fctr> <dbl>
## 1 Particle 9.958333
## 2 Free 24.058333
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(67,68,68,67)) # WholePart vs WholeFree
poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
ylab("Heterotrophic Production (μgC/L/hr)") +
##### Particle vs free bulk production
geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=68, fontface = "bold", size = 3.5, color = "gray40",
label= paste("**\np =", round(totprod_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox##
## Wilcoxon rank sum test
##
## data: log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
group_by(fraction) %>%
summarize(mean(fracprod_per_cell))## # A tibble: 2 × 2
## fraction `mean(fracprod_per_cell)`
## <fctr> <dbl>
## 1 Particle 4.816116e-07
## 2 Free 3.866798e-08
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree
poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(fracprod_per_cell), x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
ylim(c(-8.5, -5)) +
ylab("log10(production/cell) (μgC/cell/hr)") +
##### Particle vs free per-cell production
geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=-5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(percellprod_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",
axis.title.x = element_blank())
plot_grid(poster_a, poster_b, poster_c,
labels = c("A", "B", "C"),
ncol = 3)work_df <- metadata %>%
dplyr::select(norep_filter_name, fraction, fraction_bac_abund, frac_bacprod, fracprod_per_cell_noinf) %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1,4), substr(norep_filter_name, 6,9), sep = "")) %>%
dplyr::select(-norep_filter_name)
part_work_df <- work_df %>%
filter(fraction == "Particle") %>%
rename(part_bacabund = fraction_bac_abund,
part_prod = frac_bacprod,
part_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
free_work_df <- work_df %>%
filter(fraction == "Free") %>%
rename(free_bacabund = fraction_bac_abund,
free_prod = frac_bacprod,
free_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
byfrac_df <- part_work_df %>%
left_join(free_work_df, by = "norep_water_name")
summary(lm(log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, norep_water_name != "MOTE515")))##
## Call:
## lm(formula = log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df,
## norep_water_name != "MOTE515"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52690 -0.08048 0.05903 0.19565 0.35255
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0900 3.2247 0.648 0.533
## log10(free_bacabund) 0.4264 0.5532 0.771 0.461
##
## Residual standard error: 0.3111 on 9 degrees of freedom
## Multiple R-squared: 0.06194, Adjusted R-squared: -0.04229
## F-statistic: 0.5943 on 1 and 9 DF, p-value: 0.4605
plot1 <- ggplot(filter(byfrac_df, norep_water_name != "MOTE515"),
aes(x = log10(free_bacabund), y = log10(part_bacabund))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(cells/mL)") +
geom_point(size = 3, shape = 21, fill = "grey")
lm_prod_corr <- lm(part_prod ~ free_prod, data = byfrac_df)
plot2 <- ggplot(byfrac_df, aes(x = free_prod, y = part_prod)) +
xlab("Free") + ylab("Particle") +
ggtitle("Heterotrophic Production \n(μgC/L/hr)") +
geom_point(size = 3, shape = 21, fill = "grey") +
geom_smooth(method = "lm", color = "black") +
annotate("text", x =20, y= 30, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_prod_corr)$coefficients[,4][2]), digits = 3)))
lm_percell_corr <- lm(log10(part_percell_prod) ~ log10(free_percell_prod), data = byfrac_df)
plot3 <- ggplot(byfrac_df,
aes(x = log10(free_percell_prod), y = log10(part_percell_prod))) +
xlab("Free") + ylab("Particle") +
ggtitle("log10(production/cell) \n (μgC/cell/hr)") +
geom_point(size = 3, shape = 21, fill = "grey") +
geom_smooth(method = "lm", color = "black") +
annotate("text", y = -5.8, x=-7.9, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_corr)$coefficients[,4][2]), digits = 3)))
plot_grid(plot1, plot2, plot3,
nrow = 1, ncol = 3,
labels = c("A", "B", "C"),
align = "h")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
# Set the seed for reproducibility
set.seed(777)
# Calculate the alpha diversity with 100 repsampling events
alpha_calc <- calc_alpha_diversity(physeq = surface_PAFL_otu_pruned_notree_rm2)
# What was the minimum sample size?
min(sample_sums(surface_PAFL_otu_pruned_notree_rm2)) - 1## [1] 6489
# Put it altogether in a dataframe
otu_alphadiv <- calc_mean_alphadiv(physeq = surface_PAFL_otu_pruned_notree_rm2,
richness_df = alpha_calc$Richness,
evenness_df = alpha_calc$Inverse_Simpson,
shannon_df = alpha_calc$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Shannon_Entropy", "Inverse_Simpson", "Simpsons_Evenness")),
fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"),
lakesite = fct_recode(lakesite, Outlet = "MOT", Deep = "MDP", Bear = "MBR", River = "MIN"))##########################################################################
########################### CORRELATIONS #############################
##########################################################################
# RICHNESS vs SHANNON
cor(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean) # YES## [1] 0.9406491
# SHANNON VS INVERSE SIMPSON
cor(filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")$mean) # YES## [1] 0.9651242
# INVERSE SIMPSON VS SIMPSONS EVENNESS
cor(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean) # YES## [1] 0.9145095
# SIMPSONS EVENNESS VS RICHNESS
cor(filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")$mean) # YES## [1] 0.6881205
ggplot(otu_alphadiv, aes(y = mean, x = fraction)) +
ylab("Mean Diversity Value") +
facet_wrap(~measure, scale = "free_y", ncol = 4) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), color = "black") +
geom_boxplot(alpha = 0.5, outlier.shape = NA, color = "black", aes(fill = fraction)) +
scale_fill_manual(values = fraction_colors) +
scale_color_manual(values = fraction_colors) +
theme(legend.position = "none", axis.title.x = element_blank()) ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ylab("Observed Richness") +
ggtitle("Particle Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors)otu_alphadiv %>%
filter(measure == "Richness" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean), sd(mean))## # A tibble: 4 × 3
## lakesite `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Outlet 430.8567 81.99728
## 2 Deep 472.0833 23.48010
## 3 Bear 637.9933 242.53768
## 4 River 686.2633 135.20325
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ggtitle("Particle Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors)otu_alphadiv %>%
filter(measure == "Inverse_Simpson" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean))## # A tibble: 4 × 2
## lakesite `mean(mean)`
## <fctr> <dbl>
## 1 Outlet 23.66717
## 2 Deep 23.76370
## 3 Bear 35.36997
## 4 River 59.10553
#################################### Subset Dataframes
richness <- filter(otu_alphadiv, measure == "Richness")
shannon <- filter(otu_alphadiv, measure == "Shannon_Entropy")
invsimps <- filter(otu_alphadiv, measure == "Inverse_Simpson")
simpseven <- filter(otu_alphadiv, measure == "Simpsons_Evenness")
#################################### Bulk Measure Production ####################################
################### Richness ###################
summary(lm(frac_bacprod ~ mean, data = richness)) # All samples together##
## Call:
## lm(formula = frac_bacprod ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.057 -12.017 -5.654 9.256 46.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.501168 9.039579 2.047 0.0528 .
## mean -0.003335 0.018911 -0.176 0.8616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared: 0.001412, Adjusted R-squared: -0.04398
## F-statistic: 0.0311 on 1 and 22 DF, p-value: 0.8616
# Particle-Associated
lm_prod_vs_rich_PA <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_prod_vs_rich_PA)##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1271 -2.8865 -0.4649 3.7537 9.8401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.269531 5.717450 -1.971 0.07701 .
## mean 0.038125 0.009868 3.863 0.00314 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared: 0.5988, Adjusted R-squared: 0.5587
## F-statistic: 14.93 on 1 and 10 DF, p-value: 0.003143
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_rich_PA <- train(
frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_rich_PA$results # Particle-Associated CV results ## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.607118 0.6566796 2.271972 0.2865878
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free"))) # Free Living ##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.094 -11.112 -2.755 8.611 33.308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.40793 21.62006 0.250 0.808
## mean 0.05511 0.06207 0.888 0.395
##
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared: 0.07306, Adjusted R-squared: -0.01963
## F-statistic: 0.7882 on 1 and 10 DF, p-value: 0.3955
################### Shannon Entropy ###################
summary(lm(frac_bacprod ~ mean, data = shannon)) # All samples together##
## Call:
## lm(formula = frac_bacprod ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.659 -11.878 -5.666 9.231 46.593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63373 26.71652 0.623 0.540
## mean 0.08797 6.22969 0.014 0.989
##
## Residual standard error: 15.55 on 22 degrees of freedom
## Multiple R-squared: 9.064e-06, Adjusted R-squared: -0.04545
## F-statistic: 0.0001994 on 1 and 22 DF, p-value: 0.9889
# Particle-Associated
lm_prod_vs_shannon_PA <- lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_prod_vs_shannon_PA)##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9022 -2.9150 -0.5875 1.6713 12.0544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -40.320 14.007 -2.878 0.01643 *
## mean 11.089 3.068 3.614 0.00473 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.693 on 10 degrees of freedom
## Multiple R-squared: 0.5664, Adjusted R-squared: 0.5231
## F-statistic: 13.06 on 1 and 10 DF, p-value: 0.004734
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_shannon_PA <- train(
frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_shannon_PA$results # Particle-Associated CV results ## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.58008 0.6745309 2.49991 0.268315
summary(lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living ##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.004 -10.708 -3.738 6.632 37.129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.37 73.87 -0.181 0.860
## mean 9.40 18.50 0.508 0.622
##
## Residual standard error: 18.15 on 10 degrees of freedom
## Multiple R-squared: 0.02516, Adjusted R-squared: -0.07233
## F-statistic: 0.2581 on 1 and 10 DF, p-value: 0.6225
################### Inverse Simpson ###################
summary(lm(frac_bacprod ~ mean, data = invsimps)) # All samples together##
## Call:
## lm(formula = frac_bacprod ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.269 -10.298 -4.916 5.866 46.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.1577 6.0225 2.019 0.0559 .
## mean 0.1629 0.1731 0.941 0.3570
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared: 0.03868, Adjusted R-squared: -0.005019
## F-statistic: 0.8851 on 1 and 22 DF, p-value: 0.357
# Particle-Associated samples
lm_prod_vs_invsimps_PA <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps_PA)##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5589 -2.1093 -0.1969 0.8752 7.8282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.45199 2.43866 -0.185 0.85666
## mean 0.29344 0.05781 5.076 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6925
## F-statistic: 25.77 on 1 and 10 DF, p-value: 0.0004804
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_invsimps_PA <- train(
frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_invsimps_PA$results # Cross Validated PA results## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 5.31148 0.7499916 2.119997 0.2619402
#Free Living Samples
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.765 -9.356 -4.445 6.116 36.017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.0985 16.7052 0.664 0.521
## mean 0.5379 0.6598 0.815 0.434
##
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared: 0.06233, Adjusted R-squared: -0.03143
## F-statistic: 0.6648 on 1 and 10 DF, p-value: 0.4339
################### Simpson's Evenness ###################
summary(lm(frac_bacprod ~ mean, data = simpseven)) # All samples together##
## Call:
## lm(formula = frac_bacprod ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.000 -7.163 -3.815 3.392 45.845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8524 9.2286 -0.092 0.9272
## mean 274.1606 134.4253 2.040 0.0536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.26 on 22 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1208
## F-statistic: 4.16 on 1 and 22 DF, p-value: 0.05359
# Particle-Associated
lm_prod_vs_simpseven_PA <- lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_prod_vs_simpseven_PA)##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.973 -2.229 -1.086 1.356 12.380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.829 4.539 -0.844 0.41865
## mean 234.057 71.238 3.286 0.00821 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.995 on 10 degrees of freedom
## Multiple R-squared: 0.5191, Adjusted R-squared: 0.471
## F-statistic: 10.79 on 1 and 10 DF, p-value: 0.008211
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_simpseven_PA <- train(
frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_simpseven_PA$results # Particle-Associated CV results ## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.239458 0.6493075 2.882057 0.2950225
summary(lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living ##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.389 -12.029 -3.306 4.668 39.946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.85 23.52 0.674 0.516
## mean 114.96 321.02 0.358 0.728
##
## Residual standard error: 18.27 on 10 degrees of freedom
## Multiple R-squared: 0.01266, Adjusted R-squared: -0.08607
## F-statistic: 0.1282 on 1 and 10 DF, p-value: 0.7277
#################################### Per-Cell Production ####################################
################### Richness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness)) # All samples together ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8668 -0.2124 0.1045 0.2133 0.6473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.4591277 0.2212633 -38.231 < 2e-16 ***
## mean 0.0028988 0.0004641 6.246 3.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3804 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6501, Adjusted R-squared: 0.6334
## F-statistic: 39.02 on 1 and 21 DF, p-value: 3.395e-06
# Particle-Associated
lm_percell_prod_vs_rich_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich_PA)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55347 -0.21545 -0.01066 0.12536 0.58830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.0617648 0.3717671 -21.685 4.44e-09 ***
## mean 0.0023794 0.0006348 3.748 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3506 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6095, Adjusted R-squared: 0.5662
## F-statistic: 14.05 on 1 and 9 DF, p-value: 0.004567
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_rich_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_rich_PA$results # Particle-Associated CV results ## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4185395 0.6324071 0.1481733 0.3240003
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Free"))) # Free Living ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71719 -0.13833 0.07155 0.23581 0.56949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.135758 0.471253 -17.264 8.99e-09 ***
## mean 0.001657 0.001353 1.225 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared: 0.1304, Adjusted R-squared: 0.04344
## F-statistic: 1.499 on 1 and 10 DF, p-value: 0.2488
################### Shannon Entropy ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon)) # All samples together ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.03514 -0.15042 -0.03394 0.26568 0.82794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.9036 0.7534 -14.472 2.14e-12 ***
## mean 0.8805 0.1763 4.993 6.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4348 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5428, Adjusted R-squared: 0.521
## F-statistic: 24.93 on 1 and 21 DF, p-value: 6.09e-05
# Particle-Associated
lm_percell_prod_vs_shannon_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_percell_prod_vs_shannon_PA)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36686 -0.23571 -0.01330 0.03961 0.70312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.8897 0.8820 -11.213 1.37e-06 ***
## mean 0.6993 0.1935 3.614 0.00562 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3584 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5921, Adjusted R-squared: 0.5468
## F-statistic: 13.06 on 1 and 9 DF, p-value: 0.00562
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_shannon_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_shannon_PA$results # Particle-Associated CV results ## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4135852 0.6756987 0.1418017 0.2988697
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72444 -0.17785 0.08114 0.13946 0.67366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.8666 1.6332 -5.429 0.000289 ***
## mean 0.3243 0.4091 0.793 0.446298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4014 on 10 degrees of freedom
## Multiple R-squared: 0.05914, Adjusted R-squared: -0.03495
## F-statistic: 0.6285 on 1 and 10 DF, p-value: 0.4463
################### Inverse Simpson ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)) # All samples together ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97302 -0.28199 -0.05285 0.32003 0.99088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.86039 0.18153 -43.301 < 2e-16 ***
## mean 0.02355 0.00525 4.485 0.000204 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4596 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4893, Adjusted R-squared: 0.4649
## F-statistic: 20.12 on 1 and 21 DF, p-value: 0.0002037
# Particle-Associated
lm_percell_prod_vs_invsimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps_PA)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28651 -0.18384 -0.11125 0.07337 0.56444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.360961 0.159490 -46.153 5.27e-12 ***
## mean 0.018087 0.003759 4.812 0.000958 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2969 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7201, Adjusted R-squared: 0.689
## F-statistic: 23.15 on 1 and 9 DF, p-value: 0.0009581
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_invsimps_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_invsimps_PA$results # Particle-Associated CV results ## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.3550179 0.7224563 0.1143556 0.3292516
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Free"))) # Free Living ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68269 -0.11512 0.01325 0.17742 0.63175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.03513 0.35685 -22.517 6.72e-10 ***
## mean 0.01910 0.01409 1.355 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared: 0.1551, Adjusted R-squared: 0.07064
## F-statistic: 1.836 on 1 and 10 DF, p-value: 0.2052
################### Simpson's Evenness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)) # All samples together ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06874 -0.41920 0.04553 0.34511 1.56565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4496 0.4116 -18.10 2.74e-14 ***
## mean 4.3470 6.0344 0.72 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6353 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02411, Adjusted R-squared: -0.02236
## F-statistic: 0.5189 on 1 and 21 DF, p-value: 0.4792
# Particle-Associated
lm_percell_prod_vs_simpseven_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_percell_prod_vs_simpseven_PA)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4726 -0.2230 -0.1106 0.1248 0.6887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5941 0.2885 -26.324 7.96e-10 ***
## mean 15.1887 4.6341 3.278 0.00957 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3788 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.4935
## F-statistic: 10.74 on 1 and 9 DF, p-value: 0.009566
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_simpseven_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_simpseven_PA$results # Particle-Associated CV results ## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4413381 0.6737451 0.1365141 0.2962024
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63112 -0.24216 0.01388 0.14672 0.77426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9274 0.5202 -15.240 3e-08 ***
## mean 4.9361 7.1008 0.695 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4041 on 10 degrees of freedom
## Multiple R-squared: 0.04609, Adjusted R-squared: -0.0493
## F-statistic: 0.4832 on 1 and 10 DF, p-value: 0.5028
########## PUT A TABLE TOGETHER
# Per Liter Production
perliter_row1 <- c("Richness", "Per-Liter",
round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
perliter_row2 <- c("Shannon_Entropy", "Per-Liter",
round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
perliter_row3 <- c("Inverse_Simpson", "Per-Liter",
round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
perliter_row4 <- c("Simpsons_Evenness","Per-Liter",
round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
# Per cell production
percell_row1 <- c("Richness", "Per-Cell",
round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
percell_row2 <- c("Shannon_Entropy", "Per-Cell",
round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
percell_row3 <- c("Inverse_Simpson", "Per-Cell",
round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
percell_row4 <- c("Simpsons_Evenness", "Per-Cell",
round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
r2_table <- as.data.frame(rbind(perliter_row1, perliter_row2, perliter_row3, perliter_row4,
percell_row1, percell_row2, percell_row3, percell_row4))
colnames(r2_table) <- c("Diversity_Metric", "Production_Measure","Adjusted_R2","CV_R2", "CV_R2_SD")
row.names(r2_table) = NULL
pander(r2_table,
caption = "Supplemental Table 1: \n R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.")| Diversity_Metric | Production_Measure | Adjusted_R2 | CV_R2 | CV_R2_SD |
|---|---|---|---|---|
| Richness | Per-Liter | 0.559 | 0.657 | 0.287 |
| Shannon_Entropy | Per-Liter | 0.523 | 0.675 | 0.268 |
| Inverse_Simpson | Per-Liter | 0.692 | 0.75 | 0.262 |
| Simpsons_Evenness | Per-Liter | 0.471 | 0.649 | 0.295 |
| Richness | Per-Cell | 0.566 | 0.632 | 0.324 |
| Shannon_Entropy | Per-Cell | 0.547 | 0.676 | 0.299 |
| Inverse_Simpson | Per-Cell | 0.689 | 0.722 | 0.329 |
| Simpsons_Evenness | Per-Cell | 0.493 | 0.674 | 0.296 |
######################################################### OBSERVED RICHNESS
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 556.7992 167.31404
## 2 Free 338.4242 85.98665
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree
rich_distribution_plot <-
ggplot(richness, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
xlab("Observed Richness") + xlab("Fraction") +
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=790, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(rich_fraction_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Richness vs Per-Liter Heterotrophic Production Plot
prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
annotate("text", x = 790, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 790, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# All 3 richness plots together
rich_plots <- plot_grid(rich_distribution_plot, prod_vs_rich_plot, percell_prod_vs_rich_plot,
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### INVERSE SIMPSON
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 35.47659 23.843325
## 2 Free 24.09219 8.136901
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_distribution_plot <-
ggplot(invsimps, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
ylab("Inverse Simpson") + xlab("Fraction") +
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# INVERSE SIMPSON
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
annotate("text", x = 70, y=45, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Inverse Simpson vs per-cell production Plot
percell_prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 70, y=-7.5, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Plot Inverse simpson plots altogether
invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
invsimps_plots_noyaxis <-
plot_grid(invsimps_distribution_plot + theme(axis.title.y = element_blank(), axis.text.y = element_blank()),
prod_vs_invsimps_plot + theme(axis.title.y = element_blank()),
percell_prod_vs_invsimps_plot + theme(axis.title.y = element_blank()),
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
plot_grid(rich_plots, invsimps_plots_noyaxis,
ncol = 2, nrow = 1, rel_widths = c(1, 0.825),
align = "h")######################################################### SHANNON_ENTROPY
shannon_fraction_wilcox <- wilcox.test(mean ~ fraction, data = shannon)
shannon_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 119, p-value = 0.00556
## alternative hypothesis: true location shift is not equal to 0
filter(shannon) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 4.534078 0.5594638
## 2 Free 3.982314 0.2958221
# Make a data frame to draw significance line in boxplot (visually calculated)
shannon_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(5.8, 5.9, 5.9, 5.8)) # WholePart vs WholeFree
shannon_distribution_plot <-
ggplot(shannon, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
xlab("Fraction") +
# Add line and pval
geom_path(data = shannon_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=5.6, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(shannon_fraction_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Shannon vs Per-Liter Heterotrophic Production Plot
prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
annotate("text", x = 5.25, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 5.25, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
shannon_plots <- plot_grid(shannon_distribution_plot, prod_vs_shannon_plot, percell_prod_vs_shannon_plot,
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_errorbar).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### INVERSE SIMPSON
simpseven_fraction_wilcox <- wilcox.test(mean ~ fraction, data = simpseven)
simpseven_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 55, p-value = 0.3474
## alternative hypothesis: true location shift is not equal to 0
filter(simpseven) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 0.05890559 0.02537484
## 2 Free 0.07138806 0.01716014
# Make a data frame to draw significance line in boxplot (visually calculated)
simpseven_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(0.14, 0.15, 0.15, 0.14)) # WholePart vs WholeFree
simpseven_distribution_plot <-
ggplot(simpseven, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
ylab("Simpson's Evenness") +
xlab("Fraction") +
geom_path(data = simpseven_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=0.14, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# SIMPSONS EVENNESS
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
ylab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=15, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Plot
percell_prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=-6.3, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
simpseven_plots <- plot_grid(simpseven_distribution_plot, prod_vs_simpseven_plot, percell_prod_vs_simpseven_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
simpseven_plots_noyaxis <-
plot_grid(simpseven_distribution_plot + theme(axis.title.y = element_blank(), axis.text.y = element_blank()),
prod_vs_simpseven_plot + theme(axis.title.y = element_blank()),
percell_prod_vs_simpseven_plot + theme(axis.title.y = element_blank()),
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
plot_grid(shannon_plots, simpseven_plots_noyaxis,
ncol = 2, nrow = 1, rel_widths = c(1, 0.825),
align = "h")lm_simpseven_bulkprod <- lm(frac_bacprod ~ mean,
data = filter(otu_alphadiv, measure == "Simpsons_Evenness"))
ggplot(filter(otu_alphadiv, measure == "Simpsons_Evenness"),
aes(x = mean, y = frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
ylab("Heterotrophic Production \n(ug C/L/hr)") +
xlab("Simpson's Evenness") +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_fill_manual(values = fraction_colors) +
scale_color_manual(values = fraction_colors) +
theme(legend.position = c(0.15, 0.9), legend.title = element_blank()) +
annotate("text", x = 0.115, y=55, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_simpseven_bulkprod)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_simpseven_bulkprod)$coefficients[,4][2]), digits = 3))) plot_all_rich_percell <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Observed Richness") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 790, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$coefficients[,4][2]), digits = 6))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_shannon_percell <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Shannon Entropy") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 5.25, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_invsimps_percell <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 70, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_simpseven_percell <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 0.12, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$coefficients[,4][2]), digits = 2))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(plot_all_rich_percell,
plot_all_shannon_percell + theme(axis.title.y = element_blank()),
plot_all_invsimps_percell + theme(axis.title.y = element_blank()),
plot_all_simpseven_percell + theme(axis.title.y = element_blank()),
align = "h", labels = c("A", "B", "C", "D"),
rel_widths = c(1.05, 0.9, 0.9, 0.9),
nrow = 1, ncol = 4)## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
# Read in the tree
RAREFIED_rm2_fasttree <- read.tree(file = "data/PhyloTree/newick_tree_rm2_rmN.tre")
# Load in data that has doubletons removed
load("data/PhyloTree/surface_PAFL_otu_pruned_RAREFIED_rm2.RData")
surface_PAFL_otu_pruned_RAREFIED_rm2## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1891 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 70 sample variables ]
## tax_table() Taxonomy Table: [ 1891 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1891 tips and 1889 internal nodes ]
# Create the OTU table for picante
surface_PAFL_RAREFIED_rm2_otu <- matrix(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2), nrow = nrow(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2)))
rownames(surface_PAFL_RAREFIED_rm2_otu) <- sample_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
colnames(surface_PAFL_RAREFIED_rm2_otu) <- taxa_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
## Calculate input for SES_MPD
# Convert the abundance data to standardized abundanced vegan function `decostand' , NOTE: method = "total"
otu_decostand_total <- decostand(surface_PAFL_RAREFIED_rm2_otu, method = "total")
# check total abundance in each sample
apply(otu_decostand_total, 1, sum)## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# check for mismatches/missing species between community data and phylo tree
RAREFIED_rm2_matches <- match.phylo.comm(RAREFIED_rm2_fasttree, otu_decostand_total)
# the resulting object is a list with $phy and $comm elements. replace our
# original data with the sorted/matched data
phy_RAREFIED_rm2 <- RAREFIED_rm2_matches$phy
comm_RAREFIED_rm2 <- RAREFIED_rm2_matches$comm
# Calculate the phylogenetic distances
phy_dist_RAREFIED_rm2 <- cophenetic(phy_RAREFIED_rm2)
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap",
abundance.weighted = FALSE, runs = 999)
WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap",
abundance.weighted = TRUE, runs = 999)
# Gather div info
rich_df <- filter(otu_alphadiv, measure == "Richness") %>%
dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)
invsimps_df <- filter(otu_alphadiv, measure == "Inverse_Simpson") %>%
dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)
# Prepare to be merged with each other
unweighted_df <- unweighted_sesMPD_indepswap_RAREFIED_rm2 %>%
rownames_to_column("names") %>%
mutate(phylo_measure = "Unweighted_SESMPD") %>%
make_metadata_norep() %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
rename(norep_filter_name = names) %>%
left_join(rich_df, by = "norep_filter_name") %>%
mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")),
lakesite = fct_recode(lakesite, Outlet = "MOT", Deep = "MDP", Bear = "MBR", River = "MIN"))## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector
weighted_df <- WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 %>%
rownames_to_column("names") %>%
mutate(phylo_measure = "Weighted_SESMPD") %>%
make_metadata_norep() %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
rename(norep_filter_name = names) %>%
left_join(invsimps_df, by = "norep_filter_name") %>%
mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")),
lakesite = fct_recode(lakesite, Outlet = "MOT", Deep = "MDP", Bear = "MBR", River = "MIN"))## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector
######################################################### SES MPD DISTRIBUTION: UNWEIGHTED
unweighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = unweighted_df)
unweighted_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 30, p-value = 0.01449
## alternative hypothesis: true location shift is not equal to 0
unweighted_df %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))## # A tibble: 2 × 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.4970703
## 2 Free 0.4375166
# Make a data frame to draw significance line in boxplot (visually calculated)
dat4 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
unweight_distribution_plot <-
ggplot(unweighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat4, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.35, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(unweighted_fraction_wilcox$p.value, digits = 2))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(mean ~ mpd.obs.z, data = unweighted_df))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -195.66 -96.12 -14.17 80.15 295.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 443.89 28.29 15.690 1.98e-13 ***
## mpd.obs.z -124.90 34.37 -3.634 0.00147 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 138.5 on 22 degrees of freedom
## Multiple R-squared: 0.3751, Adjusted R-squared: 0.3467
## F-statistic: 13.2 on 1 and 22 DF, p-value: 0.001467
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -191.75 -112.16 -41.91 55.88 278.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 515.16 51.16 10.069 1.49e-06 ***
## mpd.obs.z -83.76 49.91 -1.678 0.124
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155 on 10 degrees of freedom
## Multiple R-squared: 0.2198, Adjusted R-squared: 0.1417
## F-statistic: 2.816 on 1 and 10 DF, p-value: 0.1242
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.15 -66.22 -18.72 43.66 172.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 337.075 42.750 7.885 1.34e-05 ***
## mpd.obs.z 3.083 77.507 0.040 0.969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.18 on 10 degrees of freedom
## Multiple R-squared: 0.0001582, Adjusted R-squared: -0.09983
## F-statistic: 0.001583 on 1 and 10 DF, p-value: 0.9691
unweight_rich_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = mean, x = mpd.obs.z)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) + ylab("Observed Richness") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=750, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between Production and Unweighted Mean Pairwise distance?
#summary(lm(frac_bacprod ~ mpd.obs.z, data = unweighted_df)) # NS
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.144 -4.312 -1.822 3.403 19.527
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.213 2.617 3.139 0.0105 *
## mpd.obs.z -3.510 2.553 -1.375 0.1991
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.928 on 10 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.07492
## F-statistic: 1.891 on 1 and 10 DF, p-value: 0.1991
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.003 -14.934 2.196 8.756 36.997
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.183 8.396 2.166 0.0556 .
## mpd.obs.z 13.428 15.223 0.882 0.3984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.71 on 10 degrees of freedom
## Multiple R-squared: 0.07219, Adjusted R-squared: -0.02059
## F-statistic: 0.7781 on 1 and 10 DF, p-value: 0.3984
unweight_prod_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and Unweighted Mean Pairwise distance?
unweight_vs_percell <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
summary(unweight_vs_percell)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7226 -0.2897 0.0040 0.1294 1.3193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1988 0.0999 -72.057 < 2e-16 ***
## mpd.obs.z -0.4972 0.1205 -4.127 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4779 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4478, Adjusted R-squared: 0.4215
## F-statistic: 17.03 on 1 and 21 DF, p-value: 0.0004795
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37972 -0.24190 -0.16526 0.04437 1.18153
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9247 0.1711 -40.472 1.71e-11 ***
## mpd.obs.z -0.3298 0.1627 -2.027 0.0733 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4649 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3135, Adjusted R-squared: 0.2372
## F-statistic: 4.109 on 1 and 9 DF, p-value: 0.07327
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63981 -0.29782 0.07682 0.17625 0.76499
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.55633 0.19602 -38.55 3.29e-12 ***
## mpd.obs.z -0.04279 0.35539 -0.12 0.907
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4135 on 10 degrees of freedom
## Multiple R-squared: 0.001447, Adjusted R-squared: -0.09841
## F-statistic: 0.01449 on 1 and 10 DF, p-value: 0.9066
unweight_percell_vs_mpd_plot <-
ggplot(unweighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color="#424645", fill = "#424645", alpha = 0.3) +
#stat_smooth(method="lm", se=TRUE, formula= y ~ poly(x, 2, raw=TRUE), color="#424645", fill = "#424645", alpha = 0.3) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=-6, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(unweight_vs_percell)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(unweight_vs_percell)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(unweight_distribution_plot, unweight_rich_vs_mpd_plot, unweight_prod_vs_mpd_plot, unweight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### SES MPD DISTRIBUTION: WEIGHTED
weighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = weighted_df)
weighted_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(weighted_df) %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))## # A tibble: 2 × 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.3607944
## 2 Free -0.3854885
# Make a data frame to draw significance line in boxplot (visually calculated)
dat5 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
weight_distribution_plot <-
ggplot(weighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat5, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.55, fontface = "bold", size = 3.5, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between inverse simpson and Weighted Mean Pairwise distance?
#summary(lm(mean ~ mpd.obs.z, data = weighted_df)) # NS
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.42 -19.15 0.40 12.95 40.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.58 12.61 1.870 0.0911 .
## mpd.obs.z -32.98 29.43 -1.121 0.2886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.57 on 10 degrees of freedom
## Multiple R-squared: 0.1116, Adjusted R-squared: 0.02276
## F-statistic: 1.256 on 1 and 10 DF, p-value: 0.2886
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0597 -4.3465 -0.8155 3.8832 18.4294
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.187 4.720 4.701 0.00084 ***
## mpd.obs.z -4.942 10.486 -0.471 0.64753
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.441 on 10 degrees of freedom
## Multiple R-squared: 0.02173, Adjusted R-squared: -0.0761
## F-statistic: 0.2221 on 1 and 10 DF, p-value: 0.6475
weight_invsimps_vs_mpd_plot <-
ggplot(weighted_df, aes(y = mean, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) + ylab("Inverse Simpson") +
#xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-LITER PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(frac_bacprod ~ mpd.obs.z, data = weighted_df))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.030 -10.236 -2.842 5.937 38.409
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.008 5.664 1.590 0.126
## mpd.obs.z -21.439 12.889 -1.663 0.110
##
## Residual standard error: 14.66 on 22 degrees of freedom
## Multiple R-squared: 0.1117, Adjusted R-squared: 0.07133
## F-statistic: 2.767 on 1 and 22 DF, p-value: 0.1104
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.240 -4.575 -1.717 4.503 15.352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.391 4.126 1.064 0.312
## mpd.obs.z -15.432 9.627 -1.603 0.140
##
## Residual standard error: 7.711 on 10 degrees of freedom
## Multiple R-squared: 0.2044, Adjusted R-squared: 0.1249
## F-statistic: 2.569 on 1 and 10 DF, p-value: 0.14
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.790 -12.275 -3.481 7.360 30.573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.697 9.683 1.518 0.160
## mpd.obs.z -24.285 21.512 -1.129 0.285
##
## Residual standard error: 17.32 on 10 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.02434
## F-statistic: 1.274 on 1 and 10 DF, p-value: 0.2853
weight_prod_vs_mpd_plot <-
ggplot(weighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94765 -0.40616 -0.03619 0.31885 1.39101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5352 0.2442 -30.860 <2e-16 ***
## mpd.obs.z -0.9519 0.5446 -1.748 0.0951 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6009 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.127, Adjusted R-squared: 0.08544
## F-statistic: 3.055 on 1 and 21 DF, p-value: 0.09508
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65329 -0.32632 -0.03486 0.22224 0.95307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.0845 0.3015 -23.496 2.18e-09 ***
## mpd.obs.z -0.9337 0.6753 -1.383 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5096 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1752, Adjusted R-squared: 0.08357
## F-statistic: 1.912 on 1 and 9 DF, p-value: 0.2001
lm_percell_free_mpd <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free"))
summary(lm_percell_free_mpd)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54189 -0.16159 -0.00204 0.20070 0.44618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9519 0.1848 -43.019 1.11e-12 ***
## mpd.obs.z -0.9777 0.4107 -2.381 0.0386 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3306 on 10 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.2979
## F-statistic: 5.667 on 1 and 10 DF, p-value: 0.03857
weight_percell_vs_mpd_plot <-
ggplot(weighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", data = filter(weighted_df, fraction == "Free"), color = "skyblue", fill = "skyblue", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
annotate("text", x = 0.3, y=-7.9, color = "skyblue", fontface = "bold",
label = paste("R2 =", round(summary(lm_percell_free_mpd)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_free_mpd)$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(weight_distribution_plot, weight_invsimps_vs_mpd_plot, weight_prod_vs_mpd_plot, weight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
# Test by station
part_unweighted_df <- filter(unweighted_df, fraction == "Particle")
kruskal.test(mpd.obs.z ~ lakesite, data = filter(unweighted_df, fraction == "Particle"))##
## Kruskal-Wallis rank sum test
##
## data: mpd.obs.z by lakesite
## Kruskal-Wallis chi-squared = 5.8718, df = 3, p-value = 0.118
unweighted_df %>%
filter(fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mpd.obs.z), sd(mpd.obs.z))## # A tibble: 4 × 3
## lakesite `mean(mpd.obs.z)` `sd(mpd.obs.z)`
## <fctr> <dbl> <dbl>
## 1 Outlet 0.7950222 0.5856743
## 2 Deep -0.9216060 0.3564753
## 3 Bear -0.7817952 0.9425671
## 4 River -1.0799021 0.2412376
# Lakesite
plot_part_unweight_lakesite <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = lakesite)) +
ggtitle("Particle-Associated Samples Only") +
scale_fill_manual(values = lakesite_colors) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
geom_jitter(size = 3, shape = 21, aes(fill = lakesite), width = 0.2) +
geom_boxplot(aes(fill = lakesite), alpha = 0.5) +
theme(axis.title.x = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank())
# Season
plot_part_unweight_season <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = season)) +
ggtitle("Particle-Associated Samples Only") +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = season_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = season), width = 0.2) +
geom_boxplot(aes(fill = season), alpha = 0.5) +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = c(0.9, 0.9), legend.title = element_blank())
plot_grid(plot_part_unweight_lakesite, plot_part_unweight_season,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")######################################################### Subset only richness data
### These are the richness values for the fake samples
#rich_stats <- filter(otu_alphadiv, measure == "Richness") %>%
# dplyr::select(1:2) %>%
# rename(mean_richness = mean) %>%
# mutate(sample = paste("Sample_", seq(1:nrow(filter(otu_alphadiv, measure == "Richness"))), sep = ""),
# mean_richness = matround(mean_richness))
## Pick OTUs to match these richness values
# List the otus from ALL samples
# all_otus <- taxa_names(surface_PAFL_otu_pruned_rm2)
# Obtain the OTU table from the phyloseq object
# otutab <- otu_table(surface_PAFL_otu_pruned_rm2)
# Make all the counts to be 0
# otutab_newvals <- apply(otutab, c(1, 2), function(x) 0)
# Stop if things are wrong
# stopifnot(colnames(otutab_newvals) == all_otus) # Do the OTU names match?
# stopifnot(rownames(otutab_newvals) == rich_stats$norep_filter_name) # Do the sample names match?
# Make it reproducible!
#set.seed(777)
#for (row in 1:nrow(rich_stats)) {
# Pick the richness value
# rich_val <- rich_stats[row, 2]
# Sample the OTUs to represent that richness value
# col_index <- sample(ncol(otutab_newvals), rich_val, replace = FALSE, prob = NULL)
# make all other columns 0
# otutab_newvals[row, col_index] <- 1
#}
## Calculate the tree for those randomized samples
# create a new phyloseq object
#random_physeq_presab_raw <- phyloseq(otu_table(otutab_newvals, taxa_are_rows = FALSE),
# tax_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))
#random_physeq_presab_raw
# Remove taxa that are 0!
#random_physeq_presab_pruned <- prune_taxa(taxa_sums(random_physeq_presab_raw) > 0, random_physeq_presab_raw)
#random_physeq_presab_pruned
# Calculate tree
# Obtain the OTU names that were retained in the dataset
#tax <- data.frame(tax_table(random_physeq_presab_pruned))
#vector_of_otus <- as.vector(tax$OTU)
# Write out the file for processing/fasttree
# write(vector_of_otus, file = "data/PhyloTree/randomized/random_physeq_presab_pruned_OTUnames.txt", ncolumns = 1, append = FALSE, sep = "\n")# Read in the tree
randomized_tree <- read.tree(file = "data/PhyloTree/randomized/newick_tree_randomized_rmN.tre")
#random_physeq_presab_pruned_tree <- merge_phyloseq(random_physeq_presab_pruned, randomized_tree)
#random_physeq_presab_pruned_tree
#save(list="random_physeq_presab_pruned_tree", file=paste0("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData"))
load("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")
random_physeq_presab_pruned_tree## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2911 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 2911 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2911 tips and 2909 internal nodes ]
# First force the OTU
randomized_otu <- matrix(otu_table(random_physeq_presab_pruned_tree), nrow = nrow(otu_table(random_physeq_presab_pruned_tree)))
rownames(randomized_otu) <- sample_names(random_physeq_presab_pruned_tree)
colnames(randomized_otu) <- taxa_names(random_physeq_presab_pruned_tree)
## Calculate input for SES_MPD
# Convert the presence/absence data to standardized abundanced vegan function `decostand' , NOTE: method = "pa"
otu_decostand <- decostand(randomized_otu, method = "pa")
# check total abundance in each sample
apply(otu_decostand, 1, sum)## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915
## 906 574 434 268 256 358 493 447 476 276 284 381 840 632 586 452 383 511 505 343 444 274 238 381
# check for mismatches/missing species between community data and phylo tree
randomized_matches <- match.phylo.comm(randomized_tree, otu_decostand)
# the resulting object is a list with $phy and $comm elements. replace our
# original data with the sorted/matched data
phy_randomized_rm2 <- randomized_matches$phy
comm_randomized_rm2 <- randomized_matches$comm
# Calculate the phylogenetic distances
phy_dist_randomized_rm2 <- cophenetic(phy_randomized_rm2)
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_randomized <- ses.mpd(comm_randomized_rm2, phy_dist_randomized_rm2, null.model = "independentswap",
abundance.weighted = FALSE, runs = 999)
df <- unweighted_sesMPD_indepswap_randomized
df$names <- row.names(df)
df_2 <- make_metadata_norep(df) %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(ntaxa ~ mpd.obs.z, data = df_2))##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = df_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -212.74 -129.44 -12.54 53.58 468.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 448.27 35.48 12.633 1.47e-11 ***
## mpd.obs.z 56.32 94.24 0.598 0.556
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 173.7 on 22 degrees of freedom
## Multiple R-squared: 0.01598, Adjusted R-squared: -0.02875
## F-statistic: 0.3572 on 1 and 22 DF, p-value: 0.5562
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Particle")))##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -203.87 -109.15 -44.96 44.29 341.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 557.62 50.67 11.006 6.56e-07 ***
## mpd.obs.z -33.22 140.62 -0.236 0.818
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 175 on 10 degrees of freedom
## Multiple R-squared: 0.00555, Adjusted R-squared: -0.0939
## F-statistic: 0.05581 on 1 and 10 DF, p-value: 0.818
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Free")))##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -107.76 -48.80 -17.85 56.29 159.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 342.48 24.62 13.913 7.19e-08 ***
## mpd.obs.z 74.88 62.79 1.193 0.261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 84.48 on 10 degrees of freedom
## Multiple R-squared: 0.1245, Adjusted R-squared: 0.03699
## F-statistic: 1.422 on 1 and 10 DF, p-value: 0.2605
ggplot(df_2, aes(y = ntaxa, x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) + ylab("Randomized Richness") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1,1)) +
theme(legend.title = element_blank(), legend.position = c(0.15, 0.9))